library(foreign)
library(reshape2)
require(MASS)
require(dplyr)
library(forestplot)
library(stargazer)
library(nnet)
library(clusterSEs)
library(mlogit)
library(olsrr)


data<-read.csv("ISQ_replication_final.csv")


#new variable for how cfr changed; making revocations into NA
change_cfr <-  as.numeric(as.character(data$EPA_2015_tolerance)) - as.numeric(as.character(data$EPA_1996_tolerance))
data$change <-change_cfr

#creating new variable for multinomial logit
data$factor <- change_cfr
data$factor[data$factor>0 |data$EPA_2015_tolerance =="exempt"] <- "less_strict"
data$factor[is.na(data$factor)] <-"revoked"
data$factor[data$factor==0] <- "same"
data$factor[data$factor<0] <- "stricter"
data$factor<-as.factor(data$factor)
data$factor<-relevel(data$factor,ref="same")

#getting year pesticide registered, standardized
data$year_new<-2015-data$year_registered

#difference epa_96 and codex_96
data$cod96nad<- as.numeric(as.character(data$codex_1996))
data$epa96nad <- as.numeric(as.character(data$EPA_1996_tolerance))
data$diff_96_epa_codex <-as.numeric(data$cod96nad-data$epa96nad)

#creating variable for whether codex higher, lower, same, NA
data$diff_96_epa_codex[data$diff_96_epa_codex<0] <- -1
data$diff_96_epa_codex[data$diff_96_epa_codex>0] <- 1
data$diff_96_epa_codex[is.na(data$diff_96_epa_codex)] <- 9
data$diff_96_epa_codex<-as.character(data$diff_96_epa_codex)

data$diff_96_epa_codex[data$diff_96_epa_codex=="-1"] <- "codex stricter"
data$diff_96_epa_codex[data$diff_96_epa_codex=="1"] <- "codex less strict"
data$diff_96_epa_codex[data$diff_96_epa_codex=="9"] <- "no codex"
data$diff_96_epa_codex[data$diff_96_epa_codex=="0"] <- "codex same"
data$diff_96_epa_codex<-as.factor(data$diff_96_epa_codex)

data$diff_96_epa_codex<-relevel(data$diff_96_epa_codex,ref = "no codex")


#subsetting data to drop observations in which Codex existed in 2015 but not 1996
data<-subset(data,is.na(data$codex_1996)&is.na(data$codex_2015)|
               !is.na(data$codex_1996) & !is.na(data$codex_2015)|
               !is.na(data$codex_1996) & is.na(data$codex_2015))

data$diff_96_epa_codex<-relevel(data$diff_96_epa_codex,ref = "no codex")

##Getting basic summary statistics for body of paper##
summary(data$diff_96_epa_codex)

##TABLE 1 Results##
length(data$factor)
summary(data$factor)/length(data$factor)


#table 2 results"
matched<-subset(data,data$diff_96_epa_codex!="no codex")
length(matched$codex_1996)
summary(matched$diff_96_epa_codex)/length(matched$diff_96_epa_codex)


##Forest plot replication##
#estimates and confidence intervals with Codex
codex_existed <-subset(data,!is.na(data$codex_1996))
existed_probs<- codex_existed %>% group_by(factor)%>%
  summarise(prob=n()/length(codex_existed$pesticide))

#point estimates for figure 3, codex existed
ex_same<-as.numeric(existed_probs[1,2])
ex_less<-as.numeric(existed_probs[2,2])
ex_revoked<-as.numeric(existed_probs[3,2])
ex_stricter <-as.numeric(existed_probs[4,2])

#confidence intervals for figure 3, codex existed
ex_same_lo<-ex_same-1.96*sqrt(ex_same*(1-ex_same)/length(codex_existed$pesticide))
ex_same_hi<-ex_same+1.96*sqrt(ex_same*(1-ex_same)/length(codex_existed$pesticide))

ex_less_lo<-ex_less-1.96*sqrt(ex_less*(1-ex_less)/length(codex_existed$pesticide))
ex_less_hi<-ex_less+1.96*sqrt(ex_less*(1-ex_less)/length(codex_existed$pesticide))

ex_revoked_lo<-ex_revoked-1.96*sqrt(ex_revoked*(1-ex_revoked)/length(codex_existed$pesticide))
ex_revoked_hi<-ex_revoked+1.96*sqrt(ex_revoked*(1-ex_revoked)/length(codex_existed$pesticide))

ex_stricter_lo<-ex_stricter-1.96*sqrt(ex_stricter*(1-ex_stricter)/length(codex_existed$pesticide))
ex_stricter_hi<-ex_stricter+1.96*sqrt(ex_stricter*(1-ex_stricter)/length(codex_existed$pesticide))


#estimates and confidence intervals without Codex
no_codex <- subset(data,is.na(data$codex_1996))
no_codex_probs<- no_codex %>% group_by(factor)%>%
  summarise(prob=n()/length(no_codex$pesticide))

#point estimates for figure 3, codex existed
no_same<-as.numeric(no_codex_probs[1,2])
no_less<-as.numeric(no_codex_probs[2,2])
no_revoked<-as.numeric(no_codex_probs[3,2])
no_stricter <-as.numeric(no_codex_probs[4,2])

#confidence intervals for figure 3, codex existed
no_same_lo<-no_same-1.96*sqrt(no_same*(1-no_same)/length(no_codex$pesticide))
no_same_hi<-no_same+1.96*sqrt(no_same*(1-no_same)/length(no_codex$pesticide))

no_less_lo<-no_less-1.96*sqrt(no_less*(1-no_less)/length(no_codex$pesticide))
no_less_hi<-no_less+1.96*sqrt(no_less*(1-no_less)/length(no_codex$pesticide))

no_revoked_lo<-no_revoked-1.96*sqrt(no_revoked*(1-no_revoked)/length(no_codex$pesticide))
no_revoked_hi<-no_revoked+1.96*sqrt(no_revoked*(1-no_revoked)/length(no_codex$pesticide))

no_stricter_lo<-no_stricter-1.96*sqrt(no_stricter*(1-no_stricter)/length(no_codex$pesticide))
no_stricter_hi<-no_stricter+1.96*sqrt(no_stricter*(1-no_stricter)/length(no_codex$pesticide))

#Forest plot for existence vs. non-existence of Codex in 1996 without same
row_names<-list( list("Less Strict","Same","Stricter","Revoked"))
point_estimates<- data.frame(points_no_codex=c(no_less, no_same, no_stricter, no_revoked), 
                             points_codex=c(ex_less, ex_same, ex_stricter, ex_revoked))
lower_bound<- data.frame(lower_no_codex=c(no_less_lo, no_same_lo, no_stricter_lo, no_revoked_lo), 
                 lower_codex=c(ex_less_lo, ex_same_lo, ex_stricter_lo, ex_revoked_lo))
upper_bound<- data.frame(upper_no_codex=c(no_less_hi, no_same_hi, no_stricter_hi, no_revoked_hi), 
                  upper_codex=c(ex_less_hi, ex_same_hi, ex_stricter_hi, ex_revoked_hi))


coef <- with(point_estimates, cbind(points_no_codex, points_codex)) 
low <- with(lower_bound, cbind(lower_no_codex, lower_codex)) 
high <- with(upper_bound, cbind(upper_no_codex, upper_codex))


forestplot(row_names, mean=coef, lower=low, upper=high, boxsize=0.02, 
          txt_gp = fpTxtGp(label=gpar(cex=.7),xlab=gpar(cex=.7),ticks=gpar(cex=.5)),
          col=fpColors(box=c("black", "gray"), line=c("black", "gray"),
          summary=c("black", "gray")), lineheight = "auto",  xlab="Probability", 
          legend=c("No Corresponding Codex MRLs in 1996", "Corresponding  Codex MRLs in 1996"), 
          legend_args = fpLegend(title="Group", pos = list("topright", inset=.1), 
          r=unit(.2, "snpc"), gp = gpar(col="black", lwd=1.5)))


##Table 3##
model_robust<-multinom(factor~diff_96_epa_codex+
                  year_new +toxicity_who_96+ 
                  earlier_cancer_epa+ 
                  aquatic_acute+
                  aquatic_chronic+
                  fruit_veg+one_of_3_primary_acreage_crops_US+one_of_3_primary_acreage_crops_US*year_new,data= data)
stargazer(model_robust)

#observations in model
nrow(fitted(model_robust))
  


#Predicted values   
##Getting modes
Mode = function(x){
  ta = table(x)
  tam = max(ta)
  if (all(ta == tam))
    mod = NA
  else
    if(is.numeric(x))
      mod = as.numeric(names(ta)[ta == tam])
  else
    mod = names(ta)[ta == tam]
  return(mod)
}



predict_robust_values<-with(data, data.frame(diff_96_epa_codex=c("no codex","codex same","codex less strict","codex stricter"),
                                      year_new=mean(na.omit(data$year_new)),
                                      earlier_cancer_epa=Mode(data$earlier_cancer_epa),
                                      fruit_veg=Mode(data$fruit_veg),
                                      one_of_3_primary_acreage_crops_US=Mode(data$one_of_3_primary_acreage_crops_US),
                                      aquatic_acute=Mode(data$aquatic_acute),
                                      aquatic_chronic=Mode(data$aquatic_chronic),
                                      toxicity_who_96=Mode(data$toxicity_who_96)))


predict(model,predict_robust_values,type="probs")


                ###APPENDIX RESULTS###

##APPENDIX 1.1###

##getting one observation per pesticide (all classifications same for a
### given product so mean is just a convenience)
grouped_data=data %>% group_by(pesticide) %>%
  summarise(aq_ac=mean(aquatic_acute,na.rm=TRUE),
            aq_ch=mean(aquatic_chronic,na.rm=TRUE),
            tox=mean(toxicity_who_96,na.rm=TRUE),
            cancer=mean(earlier_cancer_epa,na.rm=TRUE),
            cancer_age=mean(earlier_cancer_epa_date,na.rm=TRUE))

#mean, min, max and sd values for pesticide characteristics
min(grouped_data$tox,na.rm=TRUE)
min(grouped_data$cancer,na.rm=TRUE)
min(grouped_data$aq_ch,na.rm=TRUE)
min(grouped_data$aq_ac,na.rm=TRUE)

max(grouped_data$tox,na.rm=TRUE)
max(grouped_data$cancer,na.rm=TRUE)
max(grouped_data$aq_ch,na.rm=TRUE)
max(grouped_data$aq_ac,na.rm=TRUE)

mean(grouped_data$tox,na.rm=TRUE)
mean(grouped_data$cancer,na.rm=TRUE)
mean(grouped_data$aq_ch,na.rm=TRUE)
mean(grouped_data$aq_ac,na.rm=TRUE)

sd(grouped_data$tox,na.rm=TRUE)
sd(grouped_data$cancer,na.rm=TRUE)
sd(grouped_data$aq_ch,na.rm=TRUE)
sd(grouped_data$aq_ac,na.rm=TRUE)


length(na.omit(grouped_data$tox))
length(na.omit(grouped_data$cancer))
length(na.omit(grouped_data$aq_ch))
length(na.omit(grouped_data$aq_ac))


##APPENDIX 1.2##

#info on when carcinogenicity recorded
min(na.omit(grouped_data$cancer_age))
max(na.omit(grouped_data$cancer_age))
mean(na.omit(grouped_data$cancer_age))
sd(na.omit(grouped_data$cancer_age))
hist(na.omit(grouped_data$cancer_age),breaks=10,xlab="Date Recorded",main="")

##Checking correlation between 1996 and 2009 WHO toxicity##
tox_cor<-data%>%group_by(pesticide)%>%summarise(tox_1=mean(toxicity_who_96),tox_2=mean(toxicity_who_2009))
cor(tox_cor$tox_1,tox_cor$tox_2,use="pairwise.complete.obs")

##Checking correlation between 2005 and 2018 carcinogenicity##
carcen_cor<-data%>%group_by(pesticide)%>%summarise(canc_1=mean(cancer_epa),canc_2=mean(earlier_cancer_epa))
cor(carcen_cor$canc_1,carcen_cor$canc_2,use="pairwise.complete.obs")

##APPENDIX 2###

##dropping aquatic variable##
##Table 3##
ap_model1<-multinom(factor~diff_96_epa_codex+
                         year_new +toxicity_who_96+ 
                         earlier_cancer_epa+ 
                         fruit_veg+one_of_3_primary_acreage_crops_US+one_of_3_primary_acreage_crops_US*year_new,data= data)
stargazer(ap_model1)

#observations in model
nrow(fitted(ap_model1))


##Table 4; using newest measures
ap_model2<-multinom(factor~diff_96_epa_codex+
                      year_new +toxicity_who_2009+ 
                      cancer_epa+ 
                      aquatic_acute+
                      aquatic_chronic+
                      fruit_veg+one_of_3_primary_acreage_crops_US+one_of_3_primary_acreage_crops_US*year_new,data= data)
stargazer(ap_model2)

#observations in model
nrow(fitted(ap_model2))

###APPENDIX 3##

###Regression including news articles##
#news counts data
counts<-read.csv("pesticide_news_counts_final.csv")
##merging in counts data
new_data<-merge(x=data,y=counts, by.x="pesticide", by.y="pest", all.x=TRUE)

model_news1<-multinom(factor~diff_96_epa_codex+
                        year_new +toxicity_who_96+ 
                        earlier_cancer_epa+ 
                        aquatic_acute+
                        aquatic_chronic+
                        fruit_veg+
                        one_of_3_primary_acreage_crops_US+
                        one_of_3_primary_acreage_crops_US*year_new+
                        buckets_96_05_large+
                        buckets_96_05_large*diff_96_epa_codex,data= new_data)
stargazer(model_news1)


#observations in model
nrow(fitted(model_news1))

#Number of comments submitted in response to REDs
comments <- read.csv("Reg_comments_cleaned.csv")
length(comments$pesticide)



